from gensim.models import word2vec
import logging

##训练word2vec模型
class train_word2vec:

    # 获取日志信息
    logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.INFO)

    # 加载分词后的文本，使用的是Text8Corpus类
    sentences = word2vec.Text8Corpus(r'C:\\Users\86176\Desktop\\corpusSegDone_1.txt')

    # 训练模型，部分参数如下
    model = word2vec.Word2Vec(sentences, size=100, hs=1, min_count=1, window=3)

    # y1 = model.similarity(u"利奇马", u"天兔")
    # print(u"相似度为：", y1)

    # y2 = model.most_similar(u"利奇马", topn=20)  # 20个最相关的
    # print(u"和利奇马最相关的词有：\n")
    # for item in y2:
    #     print(item)
    model.wv.save_word2vec_format('C:\\Users\86176\Desktop\\word2vec.vector')

